## Used for parallelization; set start_index from 1 to 4 to get 
# the models run in the paper
# 1-4 are white treat/control then black treat/control

# start_index <- commandArgs(trailingOnly = TRUE)
start_index <- 1

## Required packages ----
library(readr)
library(grf)
library(xtable)
library(PLCE)
library(tictoc)

## Load data ----
data = read_csv('data.turnout.csv')

# set some data as factors for use below
data$reg = as.Date(data$reg)
data$p = as.factor(data$p)
data$s = as.factor(data$s)

# distances used repeatedly in estimation below
dists = 500
namepcts = .99

####################################
use.data = data[data$reg<"2000-10-10"&is.na(data$reg)==F,]
use.data$s<-1*(use.data$s=="F")
use.data$demo.distance<-use.data$demo.distance/1000


useW = use.data[use.data$whitename>=.975,]
useB = use.data[use.data$blackname>=.975,]


control.set<-c("demo.gid","vote1996", "vote1998","s","age","age.squared","medianincome", "prior.avg.value","deeded.strict", "p")

useW<-na.omit(useW[,c("vote2004","vote2000",control.set,"demo.distance","nondemo.distance")])
useB<-na.omit(useB[,c("vote2004","vote2000",control.set,"demo.distance","nondemo.distance")])


Wtreat0 = Wtreat = useW[useW$demo.distance<=1.00,]
Btreat0 = Btreat = useB[useB$demo.distance<=1.00,]
Wcont0 = Wcont = useW[useW$demo.distance>1.00 &(useW$nondemo.distance>1000),]
Bcont0 = Bcont = useB[useB$demo.distance>1.00 &(useB$nondemo.distance>1000),] 

make.numeric <- function(z) {
  z <- as.matrix(z)
  z <- apply(z, 2, as.numeric)
  as.matrix(z)
}

# Check results
head(Wtreat[,control.set][,-1])
head(make.numeric(Wtreat[,control.set][,-1]))


# Random forest for balancing 
set.seed(1)
r1<-regression_forest(Wtreat$demo.distance, X=make.numeric(Wtreat[,control.set][,-1]))
save(r1,file="w_genpropscore97.rda")
load("w_genpropscore97.rda")
p1W<-predict(r1)[,1]
p1B<-predict(r1,newdata=make.numeric(Btreat[,control.set[-1]]))[,1]

p0W<-predict(r1,newdata=make.numeric(Wcont[,control.set[-1]]))[,1]
p0B<-predict(r1,newdata=make.numeric(Bcont[,control.set[-1]]))[,1]

# Quick function for matching
findmin<-function(x,p){
  diffs<-abs(x-p)
  which(diffs==min(diffs))[1]
}

# Matches
matches1B<-sapply(p1W, findmin, p=p1B)
matches0W<-sapply(p1W, findmin, p=p0W)
matches1W<-sapply(p1W, findmin, p=p1W)

Btreat = Btreat[matches1B,]
Wcont = Wcont[matches0W,]
Bcont = Bcont[matches1W,]

## Function for running PLCE
runhoe<-function(obj){
  set.seed(1)
  obj<-as.matrix(obj)
  obj <- make.numeric(obj)
  plce(y=obj[,"vote2004"]-obj[,"vote2000"],treat=obj[,"demo.distance"],X=make.numeric(obj[,control.set[-1]]), id=obj[,"demo.gid"],var.type="HC3",num.fit = 50)

  }

tic("running PLCE")
if(start_index==1) {
    h1W<-runhoe(Wtreat)
    save(h1W,file="W1.Rda")
    }
    
    if(start_index==2) {
    h2W<-runhoe(Wcont)
    save(h2W,file="W2.Rda")
    }
    
    if(start_index==3) {
    h1B<-runhoe(Btreat)
    save(h1B,file="B1.Rda")
    }
    
    if(start_index==4) {
    h2B<-runhoe(Bcont)
    save(h2B,file="B2.Rda")
    }
toc()
    

